package com.atguigu.bigdata.spark.zzgcore.rdd.operator.transform

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @Classname Spark01_RDD_Operation_Transfrom
 * @Description 相同的首字母放在一个组中
 * @Date 2023/9/20 15:23
 * @Author zhuzhenguo
 */
object Spark07_RDD_Operation_Transform_Test {
  def main(args: Array[String]): Unit = {
    // 准备环境,这个 *表示系统当前最大可用核数
    val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
    val sc = new SparkContext(sparkConf)
    val rdd: RDD[String] = sc.textFile("datas/apache.log")

    rdd.filter(
      line => {
        val datas: Array[String] = line.split(" ")
        val time: String = datas(3)
        time.startsWith("17/05/2015")
      }
    ).collect().foreach(println)

    // 关闭环境
    sc.stop()
  }
}
